from pyspark.context import SparkContext

sc = SparkContext(master="local", appName="word_count")

students_rdd = sc.textFile("../../data/students.txt", minPartitions=3)

print(f'student_rdd:{students_rdd.getNumPartitions()}')


def map_fun(iter):
    print('map_fun')
    clazz = [i.split(',')[-1] for i in iter]
    return clazz

students_rdd.mapPartitions(map_fun).foreach(print)

def map_fun_index(index,iter):
    print(f'map_fun:{index}')
    clazz =[i.split(',')[-1] for i in iter]
    return clazz
students_rdd.mapPartitionsWithIndex(map_fun_index).foreach(print)